mask_siteid_sampling <- site_protocol_quanti[
site_protocol_quanti$variable == "year" &
site_protocol_quanti$n >= 10,
]$siteid
mask_siteid_protocol <- site_protocol_quali[
site_protocol_quali$unitabundance %in% c("Count", "Ind.100m2"), ]$siteid
mask_siteid <- mask_siteid_sampling[
mask_siteid_sampling %in% mask_siteid_protocol]
dir_plot <- here("doc", "fig", "raw_data")
if (!dir.exists(dir_plot)) {
dir.create(dir_plot)
}
abun_rich_nested <- filtered_dataset$abun_rich_op %>%
nest_by(siteid)
library(furrr)
plan(multisession, workers = 3)
future_walk2(abun_rich_nested$data, abun_rich_nested$siteid,
function (x, y, ...) {
png(paste0(dir_plot, "/species_nb_site_", y, ".png"), width = 500, height = 500*1/1.6)
p <- plot_community_data(
dataset = x, y = "species_nb", x = "year", title = y)
print(p)
dev.off()
})
future_walk2(abun_rich_nested$data, abun_rich_nested$siteid,
function (x, y, ...) {
png(paste0(dir_plot, "/species_nb_site_", y, ".png"), width = 500, height = 500*1/1.6)
p <- plot_community_data(
dataset = x, y = "total_abundance", x = "year", title = y)
print(p)
dev.off()
})
library(mapview)
#> Warning: no function found corresponding to methods exports from 'raster' for:
#> 'direction', 'gridDistance'
library(leafpop)
tar_load(filtered_dataset)
loc <- filtered_dataset$location %>%
left_join(filtered_dataset$site_quali, by = "siteid") %>%
st_as_sf(coords = c("longitude", "latitude"),
crs = 4326)
site_richness <- filtered_dataset$measurement %>%
group_by(siteid) %>%
summarise(tot_nb_species = length(unique(species)))
op_richness_summary <-
filtered_dataset$abun_rich_op %>%
group_by(siteid) %>%
summarise(enframe(summary_distribution(species_nb)), .groups = "drop") %>%
pivot_wider(names_from = "name", values_from = "value") %>%
rename(median_richness = median)
var_map_view <- c("siteid", "protocol", "unitabundance", "unitbiomass", "min",
"max", "completeness", "tot_nb_richness", "median_richness")
loc2 <- loc %>%
left_join(op_richness_summary, by = "siteid") %>%
select(any_of(var_map_view)) %>%
left_join(site_richness, by = "siteid") %>%
select(any_of(var_map_view))
mapView(loc2, zcol = "protocol")
get_file_plot_in_tbl <- function(
directory = NULL,
str_file_to_match = NULL,
regex_pattern = NULL) {
file_plot_site <- list.files(directory, full.names = TRUE)
filtered_file_plot_site <- file_plot_site[
str_detect(file_plot_site, str_file_to_match)]
names(filtered_file_plot_site) <- str_extract(filtered_file_plot_site,
regex_pattern)
filtered_file_plot_site <- enframe(
filtered_file_plot_site,
name = "siteid",
value = "file"
)
}
abun_file_plot_site_tbl <- get_file_plot_in_tbl(
directory = "fig/raw_data",
str_file_to_match = "tot_abun",
regex_pattern = "S\\d+"
)
richness_file_plot_site_tbl <- get_file_plot_in_tbl(
directory = dir_plot,
str_file_to_match = "species_nb",
regex_pattern = "S\\d+") %>%
rename(richness = file)
loc2 <- loc %>%
select(siteid, protocol) %>%
left_join(abun_file_plot_site_tbl, by = "siteid") %>%
left_join(richness_file_plot_site_tbl, by = "siteid")
m <- mapView(loc2, zcol = "protocol",
popup = popupImage(loc2$file)
)
mapshot(m, url = "map_abundance.html",
selfcontained = FALSE,
)
m2 <- mapView(loc2, zcol = "protocol",
popup = popupImage(loc2$richness)
)
mapshot(m2, url = "map_richness.html",
selfcontained = FALSE
)
trends_data <- abun_rich_op %>%
left_join(op_protocol, by = "op_id") %>%
filter(siteid %in% mask_siteid) %>%
mutate(
log_total_abundance = log(total_abundance),
log_species_nb = log(species_nb)
)
plot_trends <- trends_data %>%
group_by(siteid) %>%
nest() %>%
ungroup() %>%
slice_sample(n = 100) %>%
mutate(
p_abun = map2(data, siteid,
~plot_community_data(
dataset = .x, y = "total_abundance", x = "year", title = .y)),
p_rich = map2(data, siteid,
~plot_community_data(
dataset = .x, y = "species_nb", x = "year", title = .y),
)
)
n_plot_by_batch <- 8
map(
split(
seq_len(nrow(plot_trends)),
1:floor(nrow(plot_trends) / n_plot_by_batch) + 1),
~plot_grid(plotlist = plot_trends[.x, ]$p_abun)
)
#> Warning in split.default(seq_len(nrow(plot_trends)), 1:floor(nrow(plot_trends)/
#> n_plot_by_batch) + : data length is not a multiple of split variable
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : pseudoinverse used at 2007
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : neighborhood radius 2
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : reciprocal condition number 0
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
#> 2007
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius 2
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
#> number 0
#> $`2`
#>
#> $`3`
#>
#> $`4`
#>
#> $`5`
#>
#> $`6`
#>
#> $`7`
#>
#> $`8`
#>
#> $`9`
#>
#> $`10`
#>
#> $`11`
#>
#> $`12`
#>
#> $`13`
map(
split(
seq_len(nrow(plot_trends)),
1:floor(nrow(plot_trends) / n_plot_by_batch) + 1
),
~plot_grid(plotlist = plot_trends[.x, ]$p_rich)
)
#> Warning in split.default(seq_len(nrow(plot_trends)), 1:floor(nrow(plot_trends)/
#> n_plot_by_batch) + : data length is not a multiple of split variable
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : pseudoinverse used at 2007
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : neighborhood radius 2
#> Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
#> parametric, : reciprocal condition number 0
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
#> 2007
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius 2
#> Warning in predLoess(object$y, object$x, newx = if
#> (is.null(newdata)) object$x else if (is.data.frame(newdata))
#> as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
#> number 0
#> $`2`
#>
#> $`3`
#>
#> $`4`
#>
#> $`5`
#>
#> $`6`
#>
#> $`7`
#>
#> $`8`
#>
#> $`9`
#>
#> $`10`
#>
#> $`11`
#>
#> $`12`
#>
#> $`13`
tar_load(toy_dataset)
unique(toy_dataset$siteid)
#> [1] "S8633" "S11138" "S534" "S529" "S11219"
plot_temporal_biomass <- function (bm_data = NULL, biomass_var = NULL, com = NULL, .log = FALSE) {
#main_title <- paste0("Stab = ", round(1/(sync$cv_com), 2),", ", "Sync = ",
#round(sync$synchrony, 2),", ", "CVsp = ", round(sync$cv_sp, 2))
sym_bm_var <- rlang::sym(biomass_var)
# Total
total_biomass <- bm_data %>%
group_by(date) %>%
summarise(!!sym_bm_var := sum(!!sym_bm_var, na.rm = FALSE))
p <- bm_data %>%
mutate(label = if_else(date == max(date), as.character(species), NA_character_)) %>%
ggplot(aes_string(x = "date", y = biomass_var, color = "species")) +
geom_line() +
lims(y = c(0, max(total_biomass[[biomass_var]]))) +
labs(
#title = main_title, subtitle = paste0("Station: ", station),
y = "Biomass (g)", x = "Sampling date"
) +
ggrepel::geom_label_repel(aes(label = label),
size = 2.5, nudge_x = 1, na.rm = TRUE)
# Add total biomass
p2 <- p +
geom_line(data = total_biomass, aes(color = "black", size = 3)) +
theme(legend.position = "none")
# Add summary: richness, connectance, stab, t_lvl, sync, cv_sp
com %<>%
mutate_if(is.double, round(., 2))
label <- paste(
"S = ", com$bm_std_stab,
"sync = ", com$sync,
"CVsp = ", com$cv_sp,
"R = ", com$rich_tot_std,
"C = ", com$ct,
"Tlvl = ", com$t_lvl
)
p3 <- p2 +
annotate("text", x = median(total_biomass$date),
y = 15, label = label)
if (.log) {
p3 <- p3 + scale_y_log10()
}
return(p3)
}
ti <- toy_dataset %>%
filter(siteid == unique(toy_dataset$siteid)[2])
plot_population <- function (dataset = NULL, y_var = NULL, time_var = NULL) {
sym_y_var <- rlang::sym(y_var)
sym_time_var <- rlang::sym(time_var)
# Total
total_dataset <- dataset %>%
group_by(!!sym_time_var) %>%
summarise(!!sym_y_var := sum(!!sym_y_var, na.rm = FALSE))
p <- dataset %>%
mutate(label = if_else(!!sym_time_var == max(!!sym_time_var), as.character(species), NA_character_)) %>%
ggplot(aes_string(x = time_var, y = y_var, color = "species")) +
geom_line() +
lims(y = c(0, max(total_dataset[[y_var]]))) +
labs(
#title = main_title, subtitle = paste0("Station: ", station),
y = "Biomass (g)", x = "Sampling time_var"
) +
ggrepel::geom_label_repel(aes(label = label),
size = 2.5, nudge_x = 1, na.rm = TRUE)
# Add total biomass
p2 <- p +
geom_line(data = total_dataset, aes(color = "black", size = 3)) +
theme(legend.position = "none")
return(p2)
}
plot_population(dataset = ti, y_var = "abundance", time_var = "year")
#> Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps
plot_temporal_population(com = ti, ribbon = FALSE)
p <- plot_temporal_population(com = ti, ribbon = TRUE)
GeomRibbon$handle_na <- function(data, params) { data }
p$data %>%
ggplot(
aes(y = abundance, ymin = ymin, ymax = ymax, x = year,
fill = species)
) +
geom_ribbon()
set.seed(1)
test <- data.frame(x = rep(1:10, 3), y = abs(rnorm(30)), z = rep(LETTERS[1:3],
10)) %>% arrange(x, z)
test[test$x == 4, "y"] <- NA
test$ymax <- test$y
test$ymin <- 0
zl <- unique(test$z)
for (i in 2:length(zl)) {
zi <- test$z == zl[i]
zi_1 <- test$z == zl[i - 1]
test$ymin[zi] <- test$ymax[zi_1]
test$ymax[zi] <- test$ymin[zi] + test$ymax[zi]
}
# fix GeomRibbon
GeomRibbon$handle_na <- function(data, params) { data }
ggplot(test, aes(x = x, y=y, ymax = ymax, ymin = ymin, fill = z)) +
geom_ribbon()
toy_dataset %>%
group_by(siteid, year, species) %>%
summarise(test=n()>1) %>%
filter(test)
pop_trends <- toy_dataset %>%
filter(!siteid %in% c("S534", "S8633")) %>%
group_by(siteid) %>%
nest() %>%
mutate(
p_pop = map(data, ~plot_temporal_population(com = .x, ))
)
#> Note: Using an external vector in selections is ambiguous.
#> i Use `all_of(species_var)` instead of `species_var` to silence this message.
#> i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
#> Note: Using an external vector in selections is ambiguous.
#> i Use `all_of(y_var)` instead of `y_var` to silence this message.
#> i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
#> Note: Using an external vector in selections is ambiguous.
#> i Use `all_of(species)` instead of `species` to silence this message.
#> i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
#> This message is displayed once per session.
pop_trends$p_pop
#> [[1]]
#> Warning: Removed 280 rows containing missing values (position_stack).
#>
#> [[2]]
#>
#> [[3]]
#> Warning: Removed 104 rows containing missing values (position_stack).
The operation are too time consuming, so I set eval = FALSE from here.
knitr::opts_chunk$set(eval=FALSE)
rvat_shp_files <- here("inst", "extdata", "RiverATLAS_v10_shp") %>%
list.files(., full.names = TRUE) %>%
.[stringr::str_detect(., "\\.shp")]
eu_shp <- rvat_shp_files[stringr::str_detect(rvat_shp_files, "eu.shp$")]
eu_shp_layers <- sf::st_layers(eu_shp, do_count = TRUE)
#<!--There are `r length(rvat_shp_files)`. The dataset is gigantic,-->
#<!--`r eu_shp_layers$field` variables...-->
tictoc::tic()
riveratlas_slice <- sf::read_sf(eu_shp, query = "SELECT * FROM RiverATLAS_v10_eu WHERE FID = 1")
tictoc::toc()
#313.564 sec elapsed
col_riveratlas <- colnames(riveratlas_slice)
stream_charac <- col_riveratlas[str_detect(col_riveratlas, "[A-Z]")]
# environmental variable begins by three character
env_stream <- str_sub(col_riveratlas[!col_riveratlas %in% stream_charac], 1, 3)
unique_env_type <- unique(env_stream)
tictoc::tic()
test <- sf::read_sf(eu_shp, query = "SELECT MAIN_RIV FROM RiverATLAS_v10_eu")
tictoc::toc()
#188.71 sec elapsed
loc <- get_site_location_as_sf()
ti <- st_crop(test,
c(
xmin = st_bbox(loc)["xmin"],
xmax = st_bbox(loc)["xmax"],
ymin = st_bbox(loc)["ymin"],
ymax = st_bbox(loc)["ymax"]
)
)
object.size(test) * 10^-6
tar_load(filtered_dataset)
sf::sf_use_s2(FALSE)
world <- ne_countries(returnclass = "sf") %>%
select(sov_a3)
filtered_dataset$location
loc <- filtered_dataset$location %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326)
# Probleme de coordonées en Suède et en Angleterre
overlap_mask <- st_intersects(loc, world)
overlap_mask <- st_is_within_distance(loc, world, dist = 100)
mask_country_loc <- map_lgl(overlap_mask, any)
plot(st_geometry(world %>% st_crop(st_bbox(loc[!mask_country_loc, "siteid"]))))
plot(st_geometry(loc[!mask_country_loc, "siteid"]), add = TRUE)
mask_world <- map_int(overlap_mask[mask_country_loc], 1) %>% unique
selected_country <- world[mask_world, ]
plot(st_geometry(selected_country))
its_rv_country <- st_intersects(test, selected_country)
mask_river_country <- map_lgl(its_rv_country, any)
sum(mask_river_country) / length(mask_river_country)
map_dbl(list(test[mask_river_country, ], test), object.size) %>% reduce(., `/`)
#https://github.com/r-spatial/sf/issues/792
#https://stackoverflow.com/questions/51292952/snap-a-point-to-the-closest-point-on-a-line-segment-using-sf
snapped_pt <- st_snap(
st_transform(loc, crs = 54008),
st_transform(test[mask_river_country, ], crs = 54008),
tolerance = 200
)
hull_station <- st_convex_hull(st_union(loc))
plot(st_geometry(world))
plot(st_geometry(hull_station), add = TRUE)
its_rv_st <- st_intersects(test, hull_station)
mask_river_atlas <- map_lgl(its_rv_st, any)
sum(mask_river_atlas) / length(mask_river_atlas)
map_dbl(list(test[mask_river_atlas, ], test), object.size) %>% reduce(., `/`)
?st_nearest_points
mask_country_loc <- map_lgl(overlap_mask, any)
plot(st_geometry(world %>% st_crop(st_bbox(loc[!mask_country_loc, "siteid"]))))
plot(st_geometry(loc[!mask_country_loc, "siteid"]), add = TRUE)
unit(10, units = "cm")
## datetime
Sys.time()
## repository
if(requireNamespace('git2r', quietly = TRUE)) {
git2r::repository()
} else {
c(
system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
system2("git", args = c("remote", "-v"), stdout = TRUE)
)
}
## session info
sessionInfo()